FIGURE 1 



TGGCCTCCCCAGCTTGCCAGGCACAAGGCTGAGCGGGAGGAAGCGAGAGGCATCTA 

AGCAGGCAGTGTTTTGCCTTCACCCCAAGTGAC CATGA GAGGTGCCACGCGAGTCTC 

AATCATGCTCCTCCTAGTAACTGTGTCTGACTGTGCTGTGATCACAGGGGCCTGTGA 

GCGGGATGTCCAGTGTGGGGCAGGCACCTGCTGTGCCATCAGCCTGTGGCTTCGAGG 

GCTGCGGATGTGCACCCCGCTGGGGCGGGAAGGCGAGGAGTGCCACCCCGGCAGCC 

ACAAGGTCCCCTTCTTCAGGAAACGCAAGCACCACACCTGTCCTTGCTTGCCCAACC 

TGCTGTGCTCCAGGTTCCCGGACGGCAGGTACCGCTGCTCCATGGACTTGAAGAACA 

TCAATTTTTAGGCGCTTGCCTGGTCTCAGGATACCCACCATCCTTTTCCTGAGCACAG 

CCTGGATTTTTATTTCTGCCATGAAACCCAGCTCCCATGACTCTCCCAGTCCCTACAC 

TGACTACCCTGATCTCTCTTGTCTAGTACGCACATATGCACACAGGCAGACATACCT 

CCCATCATGACATGGTCCCCAGGCTGGCCTGAGGATGTCACAGCTTGAGGCTGTGGT 

GTGAAAGGTGGCCAGCCTGGTTCTCTTCCCTGCTCAGGCTGCCAGAGAGGTGGTAAA 

TGGCAGAAAGGACATTCCCCCTCCCCTCCCCAGGTGACCTGCTCTCTTTCCTGGGCCC 

TGCCCCTCTCCCCACATGTATCCCTCGGTCTGAATTAGACATTCCTGGGCACAGGCTC 

TTGGGTGCATTGCTCAGAGTCCCAGGTCCTGGCCTGACCCTCAGGCCCTTCACGTGA 

GGTCTGTGAGGACCAATTTGTGGGTAGTTCATCTTCCCTCGATTGGTTAACTCCTTAG 

TTTCAGACCACAGACTCAAGATTGGCTCTTCCCAGAGGGCAGCAGACAGTCACCCCA 

AGGCAGGTGTAGGGAGCCCAGGGAGGCCAATCAGCCCCCTGAAGACTCTGGTCCCA 

GTCAGCCTGTGGCTTGTGGCCTGTGACCTGTGACCTTCTGCCAGAATTGTCATGCCTC 

TGAGGCCCCCTCTTACCACACTTTACCAGTTAACCACTGAAGCCCCCAATTCCCACA 

GCTTTTCCATTAAAATGCAAATGGTGGTGGTTCAATCTAATCTGATATTGACATATTA 

GAAGGCAATTAGGGTGTTTCCTTAAACAACTCCTTTCCAAGGATCAGCCCTGAGAGC 

AGGTTGGTGACTTTGAGGAGGGCAGTCCTCTGTCCAGATTGGGGTGGGAGCAAGGG 

ACAGGGAGCAGGGCAGGGGCTGAAAGGGGCACTGATTCAGACCAGGGAGGCAACT 

ACACACCAACATGCTGGCTTTAGAATAAAAGCACCAACTGAAAAAA 
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MRGATRVSMLLLVTVSDCAVITGACERDVQCGAGTCCAISLWLRGLRMCTPLGREGEE 
C 

HPGSHKVPFFRKRKHHTCPCLPNLLCSRFPDGRYRCSMDLKNINF 



Important features: 

Signal peptide: 
1-19 



N-myristoylation sites: 
33 
35 
46 
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PRO XXXXXXXXXXXXXXX (Length = 1 5 amino acids) 

Comparison Protein XXXXXYYYYYYY (Length = 12 amino acids) 

% amino acid sequence identity = 

(the number of identically matching amino acid residues between the two polypeptide sequences 
as determined by ALIGN-2) divided by (the total number of amino acid residues of the PRO 
polypeptide) = 



5 divided by 15 = 33.3% 
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PRO XXXXXXXXXX (Length = 10 amino acids) 

Comparison Protein XXXXXYYYYYYZZYZ (Length = 1 5 amino acids) 

% amino acid sequence identity = 

(the number of identically matching amino acid residues between the two polypeptide sequences 
as determined by ALIGN-2) divided by (the total number of amino acid residues of the PRO 
polypeptide) = 

5 divided by 10 = 50% 
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PRO-DNA NNNNNNNNNNNNNN (Length = 14 nucleotides) 

Comparison DNA NNNNNNLLLLLLLLLL (Length = 1 6 nucleotides) 

% nucleic acid sequence identity = 

(the number of identically matching nucleotides between the two nucleic acid sequences as 
determined by ALIGN-2) divided by (the total number of nucleotides of the PRO-DNA nucleic 
acid sequence) = 

6 divided by 14 = 42.9% 
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PRO-DNA NNNNNr>WNNNNN (Length = 12 nucleotides) 

Comparison DNA NNNNLLLW (Length = 9 nucleotides) 

% nucleic acid sequence identity = 

(the number of identically matching nucleotides between the two nucleic acid sequences as 
determined by ALIGN-2) divided by (the total number of nucleotides of the PRO-DNA nucleic 
acid sequence) = 

4 divided by 12 = 33.3% 
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TGGCTCCCCAGCTTGCCAGGCACAAGGCTGAGCTGGAGGAAGCGAGANGCATCTAA 
GCAG 

GCAGTGTTTTGCCTTCACCCCAAGTGACCATGAGAGGTGCCACGCGAGTCTCAATCA 
TGC 

TCCTCCTAGTAACTGTGTCTGACTGTGCTGTGATCACAGGGGCCTGTGAGCGGGATG 
TCC 

AGTGTGGGGCAGGCACCTGCTGTGCCATCAGCCTGTGGCTTCGAGGGCTGCGGATGT 
GCA 

CCCCGCTGGGGCGGGAAGGCGAGGAGTGCCACCCCGGCAGCCACAAGGTCCCCTTC 
TTCA 

GGAAACGCAAGCACCACACCTGTCTTGTTGCCCAACCTGCTGTGCTCCAGTTCCGGA 
CGG 

CAGTACGCTGCTCA 
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FIGURE 1 6 A-C 

TGGCCTCCCCAGCTTGCCAGGCACAAGGCTGAGCGGGAGGAAGCGAGAGG 50 
CATCTAAGCAGGCAGTGTTTTGCCTTCACCCCAAGTGACCATGAGAGGTG 

M R G 

CCACGCGAGTCTCAATCATGCTCCTCCTAGTAACTGTGTCTGACTGTGCT 
ATRVSIMLLLVTVSDC A 
GTGATCACAGGGGCCTGTGAGCGGGATGTCCAGTGTGGGGCAGGCACCTG 200 

VITGACERDVQCGAGTC 
CTGTGCCATCAGCCTGTGGCTTCGAGGGCTGCGGATGTGCACCCCGCTGG 

CAISLWLRGLRMCTPL 
GGCGGGAAGGCGAGGAGTGCCACCCCGGCAGCCACAAGGTCCCCTTCTTC 
GREGEECHPGSHKVPFF 
AGGAAACGCAAGCACCACACCTGTCCTTGCTTGCCCAACCTGCTGTGCTC 
RKRKHHTCPCLPNLLCS 
CAGGTTCCCGGACGGCAGGTACCGCTGCTCCATGGACTTGAAGAACATCA 400 

RFPDGRYRCSMDLKNI 
ATTTTTAGGCGCTTGCCTGGTCTCAGGATACCCACCATCCTTTTCCTGAG 
N F * 

CACAGCCTGGATTTTTATTTCTGCCATGAAACCCAGCTCCCATGACTCTC 

CCAGTCCCTACACTGACTACCCTGATCTCTCTTGTCTAGTACGCACATAT 

GCACACAGGCAGACATACCTCCCATCATGACATGGTCCCCAGGCTGGCCT 600 

GAGGATGTCACAGCTTGAGGCTGTGGTGTGAAAGGTGGCCAGCCTGGTTC 

TCTTCCCTGCTCAGGCTGCCAGAGAGGTGGTAAATGGCAGAAAGGACATT 

CCCCCTCCCCTCCCCAGGTGACCTGCTCTCTTTCCTGGGCCCTGCCCCTC 

TCCCCACATGTATCCCTCGGTCTGAATTAGACATTCCTGGGCACAGGCTC 800 

TTGGGTGCATTGCTCAGAGTCCCAGGTCCTGGCCTGACCCTCAGGCCCTT 

C A CG TG AGGTC TGTG A GG AC C A ATTTGTGGGT AG TTC A TC TTC C C TCG A T 

TGGTTAACTCCTTAGTTTCAGACCACAGACTCAAGATTGGCTCTTCCCAG 

AGGGCAGCAGACAGTCACCCCAAGGCAGGTGTAGGGAGCCCAGGGAGGCC 1 000 

AATCAGCCCCCTGAAGACTCTGGTCCCAGTCAGCCTGTGGCTTGTGGCCT 

GTGACCTGTGACCTTCTGCCAGAATTGTCATGCCTCTGAGGCCCCCTCTT 

ACCACACTTTACCAGTTAACCACTGAAGCCCCCAATTCCCACAGCTTTTC 

CATTAAAATGCAAATGGTGGTGGTTCAATCTAATCTGATATTGACATATT 1 200 

AGAA GG C AA TTAGGGTGT TTCC TT AAAC AA C TC C TTTC C AAGGA TC AG C C 

CTGAGAGCAGGTTGGTGACTTTGAGGAGGGCAGTCCTCTGTCCAGATTGG 

GGTGGG AGCAAGGGACAGGGAGCAGGGCAGGGGCTGAAAGGGGC AC TGAT 

TCAGACCAGGGAGGCAACTACACACCAACATGCTGGCTTTAGAATAAAAG 1 400 

CACCAACTGAAAAAA 
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FIGURE 17 B 



normalized fuciferase activity 
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/* 

* 

* C-C increased from 12 to 15 

* Z is average of EQ 

* B is average of ND 

* match with stop is _M; stop-stop = 0; J (joker) match = 0 
*/ 

#defme _M -8 /* value of a match with a stop */ 

int _day[26][26]={ 

/* ABCDEFGHIJKLMNOPQRSTUVWXYZ*/ 

/* A */ { 2, 0,-2, 0, 0,-4, 1,-1,-1, 0,-1,-2,-1, 0,_M, 1, 0,-2, 1,1,0, 0,-6, 0,-3, 0}, 

/* B */ { 0, 3,-4, 3, 2,-5, 0, 1 ,-2, 0, 0,-3,-2, 2,_M,-1 , 1 , 0, 0, 0, 0,-2,-5, 0,-3, 1 }, 

/* C */ {-2,-4,15,-5,-5,-4,-3,-3,-2, 0,-5,-6,-5,-4,_M,-3,-5,-4, 0,-2, 0,-2,-8, 0, 0,-5}, 

/* D */ { 0, 3,-5, 4, 3,-6, 1, 1,-2, 0, 0,-4,-3, 2,_M,-1, 2,-1, 0, 0, 0,-2,-7, 0,-4, 2}, 

/* E */ { 0, 2,-5, 3, 4,-5, 0, 1,-2,-0, 0,-3,-2, 1,_M,-1, 2,-1, 0, 0, 0,-2,-7, 0,-4, 3}, 

/* F */ {.4,-5,-4,-6,-5, 9,-5,-2, 1 , 0,-5, 2, 0,-4,_M,-5,-5,-4,-3,-3, 0,-1 , 0, 0, 7,-5}, 

/* G */ {1, 0,-3, 1, 0,-5, 5,-2,-3, 0,-2,-4,-3, 0,_M,-l,-l,-3, 1, 0, 0,-1,-7, 0,-5, 0}, 

/* H */ {-1, 1,-3, 1, 1,-2,-2, 6,-2, 0, 0,-2,-2, 2,_M, 0, 3, 2,-1,-1, 0,-2,-3, 0, 0, 2}, 

/* I */ {-1,-2,-2,-2,-2, 1,-3,-2, 5, 0,-2, 2, 2,-2,_M,-2,-2,-2,-l, 0, 0,4,-5, 0,-1,-2}, 

/* J */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,_M, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 

/* K */ {-1, 0,-5, 0, 0,-5,-2, 0,-2, 0, 5,-3, 0, 1,_M,-1, 1, 3, 0, 0, 0,-2,-3, 0,-4, 0}, 

/* L */ {-2,-3,-6,-4,-3, 2,-4,-2, 2, 0,-3, 6, 4,-3,_M,-3,-2,-3,-3,-l, 0, 2,-2, 0,-1 ,-2}, 

/* M */ {-1,-2,-5,-3,-2, 0,-3,-2, 2, 0, 0, 4, 6,-2,_M,-2,-l, 0,-2,-1, 0, 2,-4, 0,-2,-1}, 

/* N */ { 0, 2,-4, 2, 1 ,-4, 0, 2,-2, 0, 1 ,-3,-2, 2,_M,-1 , 1 , 0, 1 , 0, 0,-2,-4, 0,-2, 1 }, 

/* O */ {_M,_M,_M,_M,_M,_M,_M,_M,_M,_M,_M,_M,_M,_M, 0,_M,_M,_M,_M,_M,_M,_M,_M,_M,_M,_M} , 

/* P */ { 1,-1,-3,-1,-1,-5,-1, 0,-2, 0,-l,-3,-2,-l,_M, 6, 0, 0, 1, 0, 0,-1,-6, 0,-5, 0}, 

/* Q */ { 0, 1,-5, 2, 2,-5,-1, 3,-2, 0, 1,-2,-1, 1,_M, 0, 4, 1,-1,-1, 0,-2,-5, 0,-4, 3}, 

/* R */ {-2, 0,-4,-1,-1,-4,-3, 2,-2, 0, 3,-3, 0, 0,_M, 0, 1, 6, 0,-1, 0,-2, 2, 0,-4, 0}, 

/* S */ { 1, 0, 0, 0, 0,-3, 1,-1,-1, 0, 0,-3,-2, 1,_M, 1,-1,0, 2, 1, 0,-1,-2, 0,-3, 0}, 

1*1*1 { 1, 0,-2, 0, 0,-3, 0,-1, 0, 0, 0,-1,-1, 0,_M, 0,-1,-1, 1, 3, 0, 0,-5, 0,-3, 0}, 

/* U */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,_M, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 

/* V */ { 0,-2,-2,-2,-2,-1,-1,-2, 4, 0,-2, 2, 2,-2,_M,- 1,-2,-2,-1, 0, 0, 4,-6, 0,-2,-2}, 

/* W */ {-6,-5,-8,-7,-7, 0,-7,-3,-5, 0,-3,-2,-4,-4,_M,-6,-5, 2,-2,-5, 0,-6,17, 0, 0,-6}, 

/* X */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,_M, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 

/* Y */ {-3,-3, 0,-4,-4, 7,-5, 0,-1 , 0,-4,-1 ,-2,-2,_M,-5,-4,-4,-3,-3, 0,-2, 0, 0, 1 0,-4}, 

l*Z*l {0, 1,-5,2,3,-5,0,2,-2,0,0,-2,-1, 1,_M, 0,3, 0,0, 0,0,-2,-6, 0,-4, 4} 

}; 
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/* 
*/ 

#include<stdio.h> 
^include <ctype.h> 



#define 


MAXJMP 


16 


/* max jumps in a diag */ 


#define 


MAXGAP 


24 


/* don't continue to penalize gaps larger than this */ 


#deilne 


JMPS 


1024 


/* max jmps in an path */ 


#defrae 


MX 


4 


/* save if there's at least MX-1 bases since last jmp */ 


#define 


DMAT 


3 


/* value of matching bases */ 


#define 


DMIS 


0 


/* penalty for mismatched bases */ 


#define 


DINSO 


8 


/* penalty for a gap */ 


#define 


DINS1 


1 


/* penalty per base */ 


^define 


PIMSO 


8 


/* penalty for a gap */ 


# define 


PINS1 


4 


/* penalty per residue */ 



struct jmp { 

short 

unsigned short 

}; 

struct diag { 
int 
long 
short 
struct jmp 

}; 



n [MAXJMP]; 
xfMAXJMP]; 



score; 
offset; 
ijmp; 

jp; 



/* size of jmp (neg for dely) */ 
/* base no. of jmp in seq x */ 
/* limits seqto2 A 16-l */ 



/* score at last jmp */ 
/* offset of prev block */ 
/* current jmp index */ 
/* list of jmps */ 



struct path { 





int 


spc; /* 


number of leading spaces */ 




short 


n[JMPS];/* size of jmp (gap) */ 


}; 


int 


x[JMPS];/* loc of jmp (last elem before gap) */ 


char 




*ofile; 


/* output file name */ 


char 




*namex[2]; 


/* seq names: getseqsO */ 


char 




*prog; 


/* prog name for err msgs */ 


char 




*seqx[2]; 


/* seqs: getseqsO */ 


int 




dmax; 


/* best diag: nw() */ 


int 




dmaxO; 


/* final diag */ x 


int 




dna; 


/* set if dna: main() */ 


int 




endgaps; 


/* set if penalizing end gaps *i 


int 




gapx, gapy; 


/* total gaps in seqs */ 


int 




lenO, lenl ; 


/* seq lens */ 


int 




ngapx, ngapy; 


/* total size of gaps */ 


int 




smax; 


/* max score: mv() */ 


int 




*xbm; 


/* bitmap for matching */ 


long 




offset; 


/* current offset in jmp file */ 


struct 


diag 


*dx; 


/* holds diagonals */ 


struct 


path 


ppM; 


/* holds path for seqs */ 


char 




*calloc() ? *malloc()> 


*indexQ, *strcpyO; 


char 




*getseq0, *g_callocQ; 



Page 1 of nw.h 



FIGURE 20 C 



/* Needleman-Wunsch alignment program 
* 

* usage: progs filel file2 

* where filel and file2 are two dna or two protein sequences. 

* The sequences can be in upper- or lower-case an may contain ambiguity 

* Any lines beginning with Y, *>' or '< f are ignored 

. * Max file length is 65535 (limited by unsigned short x in the jmp struct) 

* A sequence with 1 13 or more of its elements ACGTU is assumed to be DNA 

* Output is in the file "align.out" 
* 

* The program may create a tmp file in /tmp to hold info about traceback. 

* Original version developed under BSD 4.3 on a vax 8650 
*/ 

#include "nw.h" 
#include "day.h" 



static _dbval[26] = { 

1,14,2,13,0,0,4,11,0,0,12,0,3,15,0,0,0,5,6,8,8,7,9,0,10,0 



static _pbval[26] = { 

1, 2|(l«CD , - , A f ))|(l«CN'- r A , )X 4, 8, 16, 32, 64, 
128, 256, OxFFFFFFF, 1«10, 1«12, 1«13, 1«14, 

I«15, 1«16, 1«17, 1«18, 1«19, 1«20, 1«21, 1«22, 
1«23, 1«24, l^KK^'E^A^KK^'Q^A')) 



main(ac, av) 

int ac; 
char *av[]; 



mam 



{ 



prog=av[0]; 
if(ac!= 3){ 

fprintf(stderr,"usage: %s filel file2\n", prog); 

fprintf(stderr,"where filel and file2 are two dna or two protein sequences.W); 
fprintf^stderr/'The sequences can be in upper- or lower-caseW); 
fprintf(stderr,"Any lines beginning with V or are ignoredW*); 
fprint^stderr/'Output is in the file V align. outV'W); 
exit(l); 

} 

namex[0] = av[l]; 

namex[l] = av[2]; 

seqx[0] - getseq(namex[0], &len0); 

seqx[l] = getseq(namex[l], &lenl); 

xbm = (dna)? _dbval : j>bval; 

endgaps =0; /* 1 to penalize endgaps */ 

ofile = "align.out"; /* output file */ 

nw(); /* fill «* & e matrix, get the possible jmps */ 

readjmpsO; /* get the actual jmps */ ( s 

printO; /* print stats, alignment */ 

- cleanup(O); /* unlink any tmp files */ 
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/* do the alignment, return best score: mainO 

* dna: values in Fitch and Smith, PNAS, 80, 1382-1386, 1983 
*pro: PAM 250 values 

* When scores are equal, we prefer mismatches to any gap, prefer 

* a new gap to extending an ongoing gap, and prefer a gap in seqx 

* to a gap in seq y. 



/* seqs and ptrs */ 
/* keep track of dely */ 
/* keep track of delx */ 
/* for swapping rowO, rowl */ 
/* score for each type */ 
f* insertion penalties */ 
/* diagonal index */ 
/* jmp index */ 
J* score for curr, last row */ 
/* index into seqs */ 

dx = (struct diag *)g_caIloc("to get diags", lenO+lenl+1, sizeof(struct diag)); 

ndely = (hit *)g_caIJoc( rt to get ndely", lenl+l ? sizeof(int)); 
dely = (int *)g_calloc("to get dely", len 1 + 1 , sizeof(int)); 
colO = (int *)g_calloc( H to get colO", Ienl + 1, sizeof(int)); 
coll - (int *)g_calloc("to get coll \ len 3+1, sizeof(int)); 
insO = (dna)? DINSO : PINSO; 
insl = (dna)? DINS 1 :PINS1; 

smax = -10000; 
if (endgaps) { 

for (co!0[0] - dely[0] - -insO, yy - 1 ; yy <= lenl ; yy++) { 
col0[yy] = dely[yy] = col0[yy-l] - insl; 
ndely[yy]-yy; 

} 

col0[0] - 0; /* Waterman Bull Math Biol 84 */ 



*/ 






nwQ 






{ 








char 


*px, *py; 




int 


*ndely, *dely; 




int 


ndelx, delx; 




int 


*tmp; 




int 


mis; 




lot 


insO, insl; 




register 


id; 




register 


ij; 




register 


*co!0, *coll; 




register 


xx, yy; 



} 

else 



for (yy = I ; yy <= ienl ; yy-H-) 



I* fill in match matrix 
*/ 

for (px = seqx[0], xx = 1 ; xx <= lenO; px-H-, xx++) { 
/* initialize first entry in col 
*/ 

if (endgaps) { 

if(xx=l) 

col 1 [0] = delx = -(insO+ins 1 ); 

else 

coll [0] = delx = col0[0J - insl ; 
ndelx = xx; 



} 

else { 



coll[0] = 0; 
delx = -insO; 
ndelx - 0; 
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...nw 

for (py - seqxfl], yy = 1 ; yy <= lenl ; py++, yy++) { 
mis = col0[yy-1]; 
if(dna) 

mis += (xbm[*px- , A I ]&xbm[*py-'A'])? DMAT : DMIS; 

else 

mis += _day[*px-'A'][*py-'A']; 

/* update penalty for del in x seq; 

* favor new del over ongong del 

* ignore MAXGAP if weighting endgaps 
*/ 

if (endgaps || ndely[yy] < MAXGAP) { 

if (col0[yy] - insO >= dely[yy]) { 

dely[yy] = col0[yy] - (insO+insl); 
ndely[yy] = 1; 

} else { 

delyfyy] -= insl ; 
ndely[yy]++; 

} 



} else { 



} 



if (col0[yy] - (insO+insl) >= dely[yy]) { 

dely[yy] = colOfyy] - (insO+insl); 
ndely[yy] = 1; 

} else 

ndely[yy]++; 



/* update penalty for del in y seq; 
* favor new del over ongong del 
*/ 

if (endgaps (| ndelx < MAXGAP) { 

if (coll [yy-1] - insO >= delx) { 

delx = coll [yy-1] - (insO+insl); 
ndelx = 1 ; 

} else { 

delx -= insl; 
ndelx-H-; 

} 



} else { 



if (coll [yy-1] - (insO+insl) >= delx) { 

delx = coll [yy-1] - (insO+insl); 
ndelx = 1 ; 

} else 

ndelx++; 



} 



/* pick the maximum score; we're favoring 
* mis over any del and delx over dely 
*/ 
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id = xx - yy + Ienl - 1; 

if (mis >= delx && mis >= delyfyy]) 

collfyy] ~ mis; 
else if (delx >- deiyfyy]) { 

col 1 [yy] - delx; 

ij = dx[id].ijmp; 

if (dx[id] jp.n[0] && (!dna || (ndelx >= MAXJMP 
&& xx > dx[id] jp.x[ij]+MX) j| mis > dx[id].score+DTNSO)) { 
dx[id].ijmp-H-; 
if(++ij>- MAXJMP) { 
writejmps(id); 
ij = dx[id].ijmp = 0; 
dx[id]. offset = offset; 

offset -H= sizeof(struct jmp) + sizeof(offset); 

} 

} 

dx[id] jp.n[ij] = ndelx; 
dx[id] jpxfij] = xx; 
dx[id].score = delx; 

} 

else { 

coll[yy] = dely[yy]; 
ij = dx[id] ijmp; 

if (dx[id] jp.n[0] && (Idna || (ndely[yy] >= MAXJMP 

&& xx > dx[id].jp.x[ij]+MX) |[ mis > dx[id] score+DINSO)) { 
dx[id].ijmp++; 
if(++ij>= MAXJMP) { 
writejmps(id); 
ij = dx[id].ijmp = 0, 
dx[id]. offset = offset; 

offset += sizeof(struct jmp) + sizeof(offset), 

} 

} 

dx[id] jp.n[ij] = -ndely[yy], 
dx[id] jp.x[ij] - xx; 
dx [id]. score = dely[yy]; 

} 

if (xx = len0 &&yy<lenl) { 
/* last col 
*/ 

if (endgaps) 

col 1 [yy] -= insO+ins 1 *(len 1 -yy); 
if (collfyy] > smax) { 

smax = coIl[yy]; 

dmax = id; 

} 

} 

} 

if (endgaps && xx < lenO) 

col 1 [yy- 1 ] -= insO+ins 1 *(len0-xx); 
if(coll[yy-l]>smax) { 

smax = collfyy-1]; 

dmax = id; 

} 

tmp = colO; colO = coll; coll = tmp; 

} 

(void) free((char *)ndely); 
(void) free((char *)dely); 
(void) free((cbar *)co!0); 

(void) free((char *)coll);} Page 4 of nw.c 
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/* 
* 

* pnnt() only routine visible outside this module 
* 

* static: 

* getmat() - trace back best path, count matches: print() 

* pr_align() -- print alignment of described in array pQ: print() 

* dumpblockQ - dump a block of lines with numbers, stars: pr_ahgn() 

* nums() ~ put out a number line. dumpblock() 

* putiineQ - put out a line (name, [num], seq, [num]): dumpblockO 

* stars() - -put a line of stars. dumpblock() 

* stripnameO -- strip any path and prefix from a seqname 
*/ 



#include "nw.h" 



#define SPC 3 

#define P_LINE 256 /* maximum output line */ 

#define P_SPC 3 /* space between name or num and seq */ 

extern _day[26][26]; 

int olen; /* set output line length */ 

FILE *fx, /* output file */ 



pnnt() 
{ 



print 

int (x, ly, firstgap, lastgap; /* overlap */ 

if ((fx = fopen(ofile, "w")) = 0) { 

fprintf(stderr,"%s: can't write %s\n", prog, ofiie); 
cleanup(l), 

fprintf(fx, "<first sequence: %s (length = %d)W\ namex[0], lenO); 
fprintf(fx, "<second sequence* %s (length = %d)W\ namex[l], lenl); 
olen = 60; 
lx - lenO; 
ly = lenl, 

firstgap = lastgap = 0, 

if (dmax < lenl - I) { /* leading gap in x */ 

pp[0] spc = firstgap = lenl - dmax - 1; 
ly -= pp[0] spc; 

else if (dmax > lenl - I) { /* leading gap in y */ 
pp[l] spc = firstgap - dmax - (lenl - 1); 
lx pp[l]spc; 

} 

if (dmaxO < lenO - 1 ) { /* trailing gap m x */ 
lastgap = lenO - dmaxO -1 ; 
lx -= lastgap; 

else if (dmaxO > lenO - 1) { /* trailing gap in y */ 
lastgap = dmaxO - (lenO - 1); 
ly -== lastgap, 

} 

getmat(lx, ly, firstgap, lastgap); 
pr_align(); 
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/* 



* trace back the best path, count matches 
*/ 

static 

getmat(lx, ly, flrstgap, lastgap) 



/* "core" (minus endgaps) */ 
/* leading trailing overlap */ 



int lx, ly; / J 



int flrstgap, lastgap; / : 

{ 



int nm, iO, il, sizO, sizl; 

char outx[32]; 

double pet; 

register nO, nl; 

register char *p0, *p 1 ; 



/* get total matches, score 



*/ 



i0 = il = sizO = sizl =0; 
pO = seqx[0] + pp[l] spc; 
pi = seqxfl] + pp[0].spc; 
nO = pp[l]spc + 1; 
nl =pp[0].spc+ 1; 

nm = 0; 

while ( *p0 && *pl ) { 
if(sizO){ 

pi++; 

nl++; 

sizO— ; 

} 

else if (sizl) { 
p0++; 
n(H-+; 
sizl--; 

} 

else { 

if (xbm[*pO- T A t ]&xbm[*pl -'A']) 



if(nO++ = pp[0].x[iO]) 

sizO = pp[0].n[iO++]; 

if(nl-f+ = pp[l]-x[il]) 

sizl=pp[l].n[il-H-]; 

p0++; 

pi++; 



/* pet homology: 

* if penalizing endgaps, base is the shorter seq 

* else, knock off overhangs and take shorter core 
*/ 

if (endgaps) 

lx = (lenO < Ienl )? lenO : lenl ; 

else 

lx - Ox < ly)? lx : ly; 
pct= 100.*(double)nm/(double)lx; 
fprintf(fx, "\n"); 

fprintf(fx, "<%d match%s in an overlap of %d: %.2f percent simiIarity\iT, 
nm, (nm = 1 )? : "es", Ix, pet); 



nm++; 
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fprintf(fx, "<gaps in first sequence- %d", gapx); 
if (gapx) { 

(void) spnntf(outx, " (%d %s%s)", 

ngapx, (dna)? "base":"residue", (ngapx — 1)? " ,f :"s"); 
fprint^fx^VoS", outx); 



..getmat 



't- 5 



fprintf(fx, gaps in second sequence. %d", gapy); 
»f(gapy){ 

(void) sprintf(outx, " (%d %s%s)", 

ngapy, (dna)? "base". "residue", (ngapy = I)? " M :"s"); 

fprint^fx/ToS", outx); 



if (dna) 



else 



fprintf(fx, 

"\n<score %d (match = %d, mismatch = %d, gap penalty - %d + %d per base)\n" 
smax, DMAT, DMIS, DINSO, DINS l ); 



fprintf(fx, 

"\n<score: %d (Dayhoff PAM 250 matrix, gap penalty - %d + %d per residue)\n" 
smax, PINSO, PINSI), 
if (endgaps) 

fpnntf(fx, 

"<endgaps penalized left endgap- %d %s%s, right endgap: %d %s%s\n", 
firstgap, (dna) 9 "base" : "residue", (firstgap = I) 7 "" : "s", 
lastgap, (dna) 9 "base" : "residue", (lastgap == l) 9 "" : "s"); 



else 



fpnntf(fx, "<endgaps not penalized\n"), 



} 



static nm, 

static Imax, 

static ij[2], 

static nc[2], 

static ni[2], 

static s\z[2], 

static char *ps[2], 

static char *po[2], 

static char out[2][P_LINE], /* output line */ 

static char star(P_LrNE]; /* set by starsQ */ 



/* matches m core - for checking */ 
/* lengths of stripped file names */ 
/* jmp index for a path */ 
/* number at start of current line */ 
/* current elem number — for gapping */ 

/* ptr to current element V 
/* ptr to next output char slot */ 



* print alignment of described in struct path pp[] 
*/ 

static 



prjmgnO 
f 



int 
int 

register 



nn, 

more; 

i; 



/* char count */ 



pr align 



for (i = 0, [max = 0; i < 2; { 

nn = stripname(namex[i]), 
if (nn > Imax) 

Imax = nn, 



nc[i]= t; 
m[i] = i; 
siz[i] = = 0; 
ps[i] « seqx[i]; 
po[i] = ouf[i], 



} 
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for (nn = nm = 0, more = 1 ; more; ) { 

for (i = more = 0, l < 2, { 
/* 

* do we have more of this sequence 9 
*/ 

if('*i*M) 

continue, 

more++; 

if (pp[i] spc) { /* leading space */ 
*po[i]++ = ' '; 
pp[i].spc«, 

} 

else if (stz[i]) { /* in a gap */ 
*po[i]++ = '-\ 
siz[i]-; 

i 

else { /* we're putting a seq element 

*/ 

•pop] = *ps[i], 
if (islower(*ps[i])) 

*ps[i] = toupper(*ps[i]) T 

po[i]++, 
ps[i]++, 

/* 

* are we at next gap for this seq 9 
*/ 

if(m[.] = pp[i] x[ij[i]]){ 
/* 

* we need to merge all gaps 

* at this location 
*/ 

siz[i] = pp[i] n[y[i]++], 
while (ni[i] == pp[i] x[ij[i]]) 
sizp] += pp[i] n[y[ij 

} 

m[i]-H+; 

} 

} 

if (-H-nn == olen || !more Sc8c nn) { 
dumpblock(), 
for(t-0, i<2, 

po[i] = out[i], 

nn = 0; 

} 

} 

) 

f* 

* dump a block of lines, including numbers, stars: pr_align() 
*/ 

static 

dumpblock() 
{ 

register l, 

for (i = 0, i<2, 

*po[i]---'\0 , ; 
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...dumpblock 

(void) putc( T \n', fx); 
for(i = 0 ? i<2; { 

if (*out[i] && (*out[i] !=' ' || *(po[i]) != ' ')) { 

if 0 — 0) 

nums(i); 
if (i ===== 0 && *out[I]) 
stars(); 

putline(i); 

if(i==0&& *out[I]) 

fpnntf(fx, star); 

if (i = l) 

nums(i); 

} 

} 



} 

/* 

* put out a number line: dumpblockO 
*/ 

static 

nums(ix) 

int i\, /* index in out[] holding seq line */ 

{ 

char nhne[P_LINE], 
register 

register char *pn, *px, *py; 

for (pn = nhne, i = 0, i < lmax+P^SPC; t++, pn++) 
*pn = ' 

for (i = nc[ix], py = out[ix] ? *py, py++, pn++) { 

if(*py=="j|*py=-V) 

*pn = ' 



else { 



if (i%10 0 |i (i === 1 && nc[ix] !-!)){ 

j - 0 < or -i ■ i, 

for (px - pn;j; j /== 10, px~) 
*px=j%10 + '0' ; 

if(i<0) 

*px - 



else 



*pn = * '; 



} 

*pn = *\0'; 
nc[ix] = i, 

for (pn = nhne; *pn; pn++) 

(void) putc(*pn, fx); 
(void) putc('\n', fx); 

} 

/* 

* put out a line (name, [num], seq, jnum]). dumpblock() 
*/ 

static 

putlme(ix) putHne 
int ix; 

{ 
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...putline 



register char *px; 

for (px = namexpx], i = 0; *px && *px 1= px++, 

(void) putc(*px, fx); 
for (; i < lmax+P_SPC; 

(void)putc(' \fx); 

/* these count from 1 : 

* ni[] is current element (from 1) 

* nc[] is number at start of current line 
*/ 

for (px = out[ix]; *px; px++) 

(void) putc(*px&0x7F, fx); 
(void) ptitcCW, fie); 

} 



/* 

* put a line of stars (seqs always in outfO], out[l]): dumpblock() 
*/ 

static 



if (!*out[0] j| (*out[0] — * ' && *(po[0]) = ' ') || 
!*out[l] || (*out[l] ==••&& *(po[l]) = ' ')) 
return; 

px = star; 

for (i = lmax+P_SPC; i; i») 
*px-H- = "; 

for (pO = out[0], pi = out[l]; *pO && *pl ; p0++, pl++) { 
if (isalpha(*pO) && isalpha(*pl)) { 



starsQ 



stars 



int 

register cbar 



*p0, *pl,cx, *px; 



if (xbm[*p0-W3&xbm[*pl- T A']) { 




} 

else if (!dna &&_day[*pO-'A , ][*pl- t A'] > 0) 

cx = V; 



else 



cx = 



else 



cx = 



= 1 



*px++ 



cx; 



} 

*px++ = V; 
*px = W; 



} 
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/* 

* strip path or prefix from pn, return len: pr_align0 
*/ 

static 

stripname(pn) stnpname 
char *pn; /* file name (may be path) */ 

{ 

register char *px ? *py; 
py=0; 

for (px = pn; *px; px++) 
if(*px = T) 

py-px+3; 

if(py) 

(void) strcpy(pn, py); 
return(strlen(pn)); 

} 
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/* 

* cleanup() - cleanup any tmp file 

* getseq() - read in seq, set dna, len, maxlen 

* g_calloc() -- calloc() with error checkin 

* readjmps() - get the good jmps, from tmp file if necessary 

* writejmpsO -- write a filled array of jmps to a tmp file- nw() 
*/ 

#include "nw h" 
#inctude <sys/file h> 



char *jname = "/tmp/homgXXXXXX" 

FILE *fj; 

int cleanup(), 

long IseekQ, 



/* tmp file for jmps */ 
/* cleanup tmp file */ 



/* 



remove any tmp file if we blow 



*/ 

cleanup(i) 
f 



int 

^ (5) 
exit(i), 



(void) unlinkOname), 



cleanup 



/* 

* read, return ptr to seq, set dna, len, maxlen 

* skip lines starting with '<\ or 

* seq in upper or lower case 
*/ 

char * 

getseq(file, len) 

char *file, /* file name*/ 
int *len; /* seq len */ 



char 

register char 
int 

FILE 



lme[1024], *pseq; 
*px, *py; 
natgc, tlen; 



getseq 



if((fp = fopen(file,V)) = 0){ 

fpnntf(stderr,"%s: can't read %s\n", prog, file), 
exit(l), 

\ 

tlen = natgc = 0, 

while (fgets(line, 1024,fp)){ 

If (*lme = V ii *lme — || *hne = V) 

continue; 
for (px = line; *px != V; px++) 

if (isupper(*px) || islower(*px)) 
tlen-H-; 

if ((pseq - mailoc((unsigned)(t!en+6))) = 0) { 

fprintf(stderr,"%s: malloc() failed to get %d bytes for %s\n", prog, tIen+6, file); 
exit(l), 

pseq[0] = Pseq[l] = pseq[2] = pseq[3] = *\0'; 
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...getseq 

py = pseq + 4; 
*len = tlen, 
rewind(fp), 

while (fgetsfline, 1024, fp)) { 

if (*hne = ■;' || *lme — || *line = '>') 

continue; 
for (px = line, *px != '\n', px++) { 
if (isupper(*px)) 

*py-M- = *px, 
else if (islower(*px)) 

*py++ = toupper(*px), 
if(mdex( M ATGCU M ,*(py-l))) 
natgc++, 

} 

} 

*py-H- = *\0'; 
*py = "\0*; 
(void) fclose(fp), 
dna = natgc > (tlen/3); 
return(pseq-i-4}, 



} 



char * 

g_caIloc(msg, nx } sz) g calloc 

char *msg, /* program, calling routine */ 

int nx 3 sz, /* number and size of elements */ 

{ 

char *px, *ca!loc(); 

if ((px = ca!Ioc((unsigned)nx, (unsigned)sz)) = 0) { 
if(*msg){ 

fpnntf(stdenr, "%s g_calloc() failed %s (n=%d, sz=%d)\n", prog, msg, nx, sz), 
exit(l), 

\ 

} 

return(px), 

} 

/* 

* get final jmps from dx[] or tmp file, set pp[], reset dmax- main() 
*/ 

readjmps() readjmps 
{ 

int fd = -l; 

int siz, iO, il, 

register xx, 

(void) fclose(fj); 

if ((fd = open(jname, OJRDONLY, 0)) < 0) { 

fprintf(stderr, "%s: can't open() %s\n", prog, jname); 
cleanup(i); 

} 

) 

for (i = iO = il = 0, dmaxO = dmax, xx = lenO; ; { 
while (1){ 

for (j = dx[dmax] tjmp; j >= 0 && dx[dmax] jp.xft] >= xx; j-) 
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...readjmps 

if (j < 0 && dx[dmax] offset && fj) { 

(void) Iseck(fd, dxfdmax]. offset, 0); 

(void) read(fd, (char *)&dx[dmax].jp, sizeof(struct jmp)); 

(void) read(fd, (char *)&dx[dmax].offset, sizeof(dx[dmax] offset)); 

dx[dmax] ijmp = MAXJMP-1; 

} 

else 

break; 

} 

if 0 >= JMPS){ 

fpnntf(stderr, "%s: too many gaps in alignment^", prog); 
cleanup(l), 

} 

if(j>=0){ 

siz = dxfdmax] jp n(j], 
xx = dx[dmax].jp xjj]; 
=s: i dmax +- siz; 

=S if (siz < 0) { /* gap in second seq */ 

Si PPlH n[il] = -siz; 

y xx += siz, 

y /* id = xx -yy + lenl - 1 

nr* */ 

^ pp[ 1 ].x[i 1 ] = xx - dmax + len 1 - 1 ; 

gapy++; 
ngapy -= siz; 
/* ignore MAXGAP when doing endgaps */ 

siz - (-siz < MAXGAP || endgaps)? -siz ■ MAXGAP, 

} 

else if (siz > 0) { /* gap in first seq */ 
pp[0] n[iO] = siz, 
pp[0] x[i0] = xx; 
gapx++, 
ngapx += siz, 
/* ignore MAXGAP when doing endgaps */ 

siz = (siz < MAXGAP |j endgaps)? siz : MAXGAP, 
i0++; 

} 

} 

else 

break, 

} 

/* reverse the order of jmps 
*/ 

for 0 = 0, i0--;j<i0;j++, i0~) { 

i = pp[0] rife]; pp[0] nU] = pp[0].n[i0]; pp[0].n[i0] = i, 
i = pp[0].xW; pp[0].xO] = pp[0] x[i0], pp[0] x[i0] = i; 

} 

for0 = 0,il-;j<il;j++,il-){ 

i = pp[l]-n[j]; pp[l] n[j] = pp[l].n[il]; pp[l].n[il] = i; 
i = pp[l].xfi]; pp[l].xD] = PP[l].x[H]; PP[l]-x[il] = i; 

} 

if(fd>= 0) 

(void) close(fd); 

if (Q) { 

(void) unhnkOname); 
offset = 0; 
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/* 

* write a filled jmp struct offset of the prev one (if any): nw() 
*/ 

writejmps(ix) 

int ix; 

{ 

char *mktempO; 

if (mktempQname) < 0) { 

fprintf(stderr, "%s: can't mktempO %s\n", prog, jname); 
cleanup(l); 

} 

if ((f) = fopen(jname, V)) = 0) { 

fprintf(stderr, "%s: can't write %s\n", prog, jname); 
exit(l); 

} 

(void) fwrite((char *)&dx[ix] jp, sizeof(struct jmp), I, fj); 
(void) fwrite((char *)&dx[ix]. offset, sizeof(dx[ix]. offset), 1, fj); 
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